In [1]:
from io import StringIO
import pandas as pd
from bokeh.charts import Bar, TimeSeries
from bokeh.io import output_notebook, show
output_notebook()


BokehJS successfully loaded.

Issues closed


In [2]:
issues = """
milestone,n closed
0.1,"2 closed"
"0.5.1","4 closed"
0.3,"14 closed"
0.4,"15 closed"
"0.6.1","15 closed"
0.2,"16 closed"
"0.8.1","21 closed"
0.6,"21 closed"
0.5,"35 closed"
"0.8.2","71 closed"
0.9,"93 closed"
"0.9.1","105 closed"
"0.7.1","117 closed"
0.7,"144 closed"
0.8,"147 closed"
"""

issues_df = pd.read_csv(StringIO(issues))
issues_df.sort('milestone', inplace=True)
issues_df = issues_df.set_index('milestone')
issues_df['n'] = issues_df['n closed'].str.split(' closed').str[0].astype(int)
issues_df.head()


Out[2]:
n closed n
milestone
0.1 2 closed 2
0.2 16 closed 16
0.3 14 closed 14
0.4 15 closed 15
0.5 35 closed 35

In [3]:
show(
    Bar(
        issues_df[['n']], 
        ylabel='# issues closed', xlabel='milestone', 
        title='Issues closed by milestone', tools='previewsave'
    )
)


Pypi downloads


In [4]:
# Data from pypi vanity

pypi= """
package    date    count
bokeh-0.4.2p1.tar.gz    2014-03-13         2973
       bokeh-0.2.tgz    2013-10-25         2414
    bokeh-0.3.tar.gz    2013-11-19         4110
    bokeh-0.4.tar.gz    2014-02-04         2495
  bokeh-0.4.1.tar.gz    2014-02-19         2817
     bokeh-0.4.4.zip    2014-04-16         1716
  bokeh-0.4.4.tar.gz    2014-04-16         4703
     bokeh-0.5.0.zip    2014-07-08         1333
  bokeh-0.5.0.tar.gz    2014-07-08         2051
     bokeh-0.5.1.zip    2014-07-23         1405
  bokeh-0.5.1.tar.gz    2014-07-23         2438
     bokeh-0.5.2.zip    2014-08-15         1189
  bokeh-0.5.2.tar.gz    2014-08-15         2265
     bokeh-0.6.0.zip    2014-09-10         1208
  bokeh-0.6.0.tar.gz    2014-09-09         2018
     bokeh-0.6.1.zip    2014-09-25         1604
  bokeh-0.6.1.tar.gz    2014-09-25         5939
     bokeh-0.7.0.zip    2014-12-05          832
  bokeh-0.7.0.tar.gz    2014-12-05         3675
     bokeh-0.7.1.zip    2015-01-12          668
  bokeh-0.7.1.tar.gz    2015-01-12         2292
     bokeh-0.8.0.zip    2015-02-16          451
  bokeh-0.8.0.tar.gz    2015-02-16          870
     bokeh-0.8.1.zip    2015-02-23          654
  bokeh-0.8.1.tar.gz    2015-02-23         3457
     bokeh-0.8.2.zip    2015-03-25          692
  bokeh-0.8.2.tar.gz    2015-03-25         6601
     bokeh-0.9.0.zip    2015-05-15          553
  bokeh-0.9.0.tar.gz    2015-05-15        16109
     bokeh-0.9.1.zip    2015-07-04          206
  bokeh-0.9.1.tar.gz    2015-07-04         1632
"""
pypi_df = pd.read_table(StringIO(pypi), sep=r' +', engine='python', parse_dates=['date'])
pypi_df.sort('date', inplace=True)

def get_version(package_string):
    import re
    pattern = re.compile('bokeh-(?P<version>.+)(.tar.gz|.zip|.tgz)')
    return re.search(pattern, package_string).groupdict().get('version')
    
pypi_df['version'] = pypi_df.package.apply(get_version)
pypi_df.head()


Out[4]:
package date count version
1 bokeh-0.2.tgz 2013-10-25 2414 0.2
2 bokeh-0.3.tar.gz 2013-11-19 4110 0.3
3 bokeh-0.4.tar.gz 2014-02-04 2495 0.4
4 bokeh-0.4.1.tar.gz 2014-02-19 2817 0.4.1
0 bokeh-0.4.2p1.tar.gz 2014-03-13 2973 0.4.2p1

In [5]:
pypi_by_version = pypi_df.groupby('version').sum().reset_index('version')
pypi_version_date = pypi_by_version.merge(pypi_df[['date', 'version']], on='version')
pypi_version_date.drop_duplicates('version', inplace=True)
pypi_version_date.head()


Out[5]:
version count date
0 0.2 2414 2013-10-25
1 0.3 4110 2013-11-19
2 0.4 2495 2014-02-04
3 0.4.1 2817 2014-02-19
4 0.4.2p1 2973 2014-03-13

In [6]:
# Plot version
plot_version = pypi_version_date[['version', 'count']]
plot_version = plot_version.set_index('version')
plot_version

show(
    Bar(
        plot_version,
        title='Pypi downloads by version', 
        ylabel='pypi downloads', xlabel='version', tools='previewsave'
    )
)



In [7]:
# Plot date
plot_date = pypi_version_date[['date', 'count']]
plot_date = plot_date.set_index('date')
plot_date

show(
    TimeSeries(
        plot_date,
        title='Pypi downloads by date', 
        ylabel='pypi downloads', xlabel='date', tools='previewsave',
    )
)



In [ ]: